
*Set working directory
cd ""

****************************************
***Simulations
***2024 Electoral College Forecast
****************************************
*NEED TO INSTALL
	*ssc install clarify
	* install gr0034.pkg  (labmask)
	*ssc install stripplot
*****************************************

set seed 453890886 

local d = 55
local min=`d'-3
local max=`d'+3
forvalues p = `min'(1)`max' {

use "Forecast2024_Replication.dta", clear

local forecastyear = 2024
replace Cwt_`p'_d_ = Cwt_`p'_d_*-1 if republican_incumbent ==1

*As discussed in Appendix 2.1, the variance of economic conditions in 2020 was almost 10X previous years due to COVID. We do not want these extreme values unduly influencing future forecasts. Thus, we recode the sd of 2020 economic conditions to equal the average of the three previous elections. 
forvalues i = 2008(4)2020 {
      sum Cwt_`p'_d_ if year==`i'
      local sd`i' = `r(sd)'
      local mean`i' = `r(mean)'
	  di `sd`i''
}

local avgsd = (`sd2008'+`sd2012'+`sd2016')/3

egen stdecon2020_`p' = std(Cwt_`p'_d_) if year==2020, mean(`mean2020') sd(`avgsd')
sum stdecon2020_`p'
replace Cwt_`p'_d_ = stdecon2020_`p' if year==2020

local sims=10000
	
* Add in the VP shift to account for Harris entering the race
gen vpadj_pres_app_shift_`forecastyear' = pres_approval_`forecastyear'

* Makes sure only necessary years included:
drop if year <1976

* generate constant to be added to variable matrix
gen cons =1

* order data for matrix later (DV first, ignore later)
order  PerDem2Party lag_PerDem2Party_dev vpadj_pres_app_shift_`forecastyear' Cwt_`p'_d_ Anderson Perot2 democrat_p_state lag_democrat_p_state democrat_vp_state cons

	*Name the rows based on the state names and the columns based on the variable names
	*Set matrix to year being forecasted to generate predicted values for that year's data.
replace vpadj_pres_app_shift_`yr' = vpadj_pres_app_shift_`yr'+8.2 if year==2024

	mkmat lag_PerDem2Party_dev vpadj_pres_app_shift_`forecastyear' Cwt_`p'_d_ Anderson Perot2 democrat_p_state lag_democrat_p_state democrat_vp_state cons if year == `forecastyear', matrix(Variables) rownames(State)

	*Save RMSE as scalar to use later
	*the regression needs to be set year<forecastyear
reg PerDem2Party lag_PerDem2Party_dev pres_approval_`forecastyear' Cwt_`p'_d_ Anderson Perot2 democrat_p_state lag_democrat_p_state democrat_vp_state if year>1976 & year<`forecastyear'
	scalar rmse = `e(rmse)'

	* create simulations with the same variables (lag_ instead of l.)
	*AS ABOVE, REGRESSION NEEDS TO BE SET TO YEAR<forecastyear
	*ssc install clarify
	estsimp reg PerDem2Party lag_PerDem2Party_dev pres_approval_`forecastyear' Cwt_`p'_d_ Anderson Perot2 democrat_p_state lag_democrat_p_state democrat_vp_state if year>1976 & year<`forecastyear', sims(`sims')
	* drop variables so we can name the columns the simulations exactly like the name of the variables (avoids errors by naming them b1 etc). the data is not needed anymore after the simulations

	drop cons lag_PerDem2Party_dev vpadj_pres_app_shift_`forecastyear' Cwt_`p'_d_ Anderson Perot2 democrat_p_state lag_democrat_p_state democrat_vp_state 

* rename the column names so column and row names are the same.
* Note: b2 does not include the shift (it's based on pres_approval_`forecastyear'), but we name it vpadj_pres_app_shift_`forecastyear' because we are using the shifted variable to generate our forecast.

	rename b1 lag_PerDem2Party_dev
	rename b2 vpadj_pres_app_shift_`forecastyear'
	rename b3 Cwt_`p'_d_
	rename b4 Anderson
	rename b5 Perot2
	rename b6 democrat_p_state
	rename b7 lag_democrat_p_state
	rename b8 democrat_vp_state
	rename b9 cons
 
* make sure the number of rows does not exceed the number of simulations (otherwise it will be part of the Parameter matrix)

	drop if cons ==.

* create matrix based on the simulations, the columns are named exactly like the variables
	set matsize  11000
	mkmat lag_PerDem2Party_dev vpadj_pres_app_shift_`forecastyear' Cwt_`p'_d_ Anderson Perot2 democrat_p_state lag_democrat_p_state democrat_vp_state cons, matrix(Parameters) 
	
* multiply both matrices
	matrix define Uncertainty=Parameters*Variables'

* change new matrix to data and name the variables according to the state names
	svmat Uncertainty, names(col)

*drop all other variables
	drop lag_PerDem2Party_dev-b10

*Make sure state order is identical
	order Alabama
	order Arizona, before(Arkansas)
	order Delaware, before(District_of_Columbia)
	order Iowa, after(Indiana)
	order Maine, before(Massachusetts)
	order Maryland, before(Massachusetts)
	order Mississippi, before(Missouri)
	order Nebraska, after(Montana)
	order Nevada, after(Nebraska)
	order New_Hampshire, after(Nevada)
	order New_Jersey, after(New_Hampshire)
	order New_Mexico, after(New_Jersey)
	order New_York, after(New_Mexico)
	order Vermont, after(Utah)
	order West_Virginia, after(Washington)
	
*Incoporate forecasting error
*Allow seed to vary across different parameter weight models
*RMSE STORED AS SCALAR ABOVE
*generate normally distributed variable w/ sd = rmse
	local sd = rmse

*generate a local within range of correlated error
*potential range = ~50% common variance - ~95% common variance
local u = (0.97 - 0.7)*runiform()+ .7
di `u'
*set mean to 0
matrix m = (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0) 
///
*set sd to sd set above (based on RMSE)
matrix sd = (`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd',`sd') 
///
*gen matrix of correlated errors
mat def c = J(51,51,`u') 
local rowcount = 1
forval diag = 1/51 {
          matrix c[`diag',`diag'] = 1
}


*generate corrleated errors
corr2data E_Alabama E_Alaska E_Arizona E_Arkansas E_California E_Colorado E_Connecticut E_Delaware E_District_of_Columbia E_Florida E_Georgia E_Hawaii E_Idaho E_Illinois E_Indiana E_Iowa E_Kansas E_Kentucky E_Louisiana E_Maine E_Maryland E_Massachusetts E_Michigan E_Minnesota E_Mississippi E_Missouri E_Montana E_Nebraska E_Nevada E_New_Hampshire E_New_Jersey E_New_Mexico E_New_York E_North_Carolina E_North_Dakota E_Ohio E_Oklahoma E_Oregon E_Pennsylvania E_Rhode_Island E_South_Carolina E_South_Dakota E_Tennessee E_Texas E_Utah E_Vermont E_Virginia E_Washington E_West_Virginia E_Wisconsin E_Wyoming, n(10000) corr(c) means(m) sds(sd) cstorage(full)


*Add forecast error to yhats (which already incorporate model error)
* Not adding in 2024 because redundant; + forecasterror 
foreach var of varlist Alabama - Wyoming {
		gen `var'_forecast = `var' + E_`var'
	}
*******************

*Save created files in a separate folder "SimulationData"
save "SimulationData\Uncertainty_estimates_`forecastyear'_`p'.dta",  replace

* Create a Matrix for the electoral college votes 

	use "Electoral_Votes_Final.dta", clear
	

* Only keep numbers for year
local forecastyear = 2024
	keep if year ==`forecastyear'
	
* Insure state order the same across datasets. 
	keep  state_initnum Votes
*Added 5 for the nebraska maine district EC allocations
	egen total_EC=sum(Votes)
	gen cut_off =(total_EC+5)/2+1 


* create long matrix for total available EC votes
	mkmat state_initnum Votes, matrix(EC_votes)

	keep cut_off 
	keep in 1
	*Save created files in a separate folder "SimulationData"
	save "SimulationData\total_EC_`forecastyear'_`p'.dta", replace


* create matrix of all simulations:
	use "SimulationData\Uncertainty_estimates_`forecastyear'_`p'", clear

* drop all unecessary variables:
*	drop Alabama-forecasterror
	drop Alabama-E_Wyoming
* rename the forecast variables to state variables
	rename *_forecast *

* create a variable indicate forecasted Dem win

	foreach var of varlist Alabama-Wyoming {
		gen `var'_demwin =1 if `var' >50
		replace `var'_demwin =0 if `var' <=50
		label var `var'_demwin "Democrats win 2Party Vote in `var'"
	}	
	
* create matrix for state_simulations. only 0 and 1 included
	mkmat Alabama_demwin -Wyoming_demwin, matrix(state_simulations)

* multiply both matrices:
	mat define all = state_simulations*EC_votes

*convert them back to data
	svmat all

	
* add cut-off variable
	append using  "SimulationData\total_EC_`forecastyear'_`p'.dta"

	gsort- cut_off
	replace cut_off = cut_off[1]
	
	drop if all2 ==.
* all1 is the multiplication of state_initnum, all2 is the multiplication of all EC votes the Dems should receive


* Add the district state electoral adjustments here
* 2020 Elections Data Source: 
*https://www.dailykos.com/stories/2018/2/21/1742660/-The-ultimate-Daily-Kos-Elections-guide-to-all-of-our-data-sets
* Daily Kos Elections statewide election results by congressional districts used from 2012-2020
* Accessed: 6/6/24

* Create vars for previous year values statewide and by district
generate PY_Maine = ((435072)/(435072+360737))*100
generate PY_Maine_D1 = ((266376)/(266376+164045))*100
generate PY_Maine_D2 = ((168696)/(168696+196692))*100

generate PY_Nebraska = ((374583)/(374583+556846))*100
generate PY_Nebraska_D1 = ((132261)/(132261+180290))*100
generate PY_Nebraska_D2 = ((176468)/(176468+154377))*100
generate PY_Nebraska_D3 = ((65854)/(65854+222179))*100

* calculate statewide shift and apply to each district
generate Maine_shift = Maine - PY_Maine
generate Maine_D1 = Maine_shift + PY_Maine_D1
generate Maine_D2 = Maine_shift + PY_Maine_D2

generate Nebraska_shift = Nebraska - PY_Nebraska
generate Nebraska_D1 = Nebraska_shift + PY_Nebraska_D1
generate Nebraska_D2 = Nebraska_shift + PY_Nebraska_D2
generate Nebraska_D3 = Nebraska_shift + PY_Nebraska_D3

* create var indicating if demwin
gen Maine_D1_demwin= 1 if Maine_D1 > 50
replace Maine_D1_demwin =0 if Maine_D1 <=50

gen Maine_D2_demwin= 1 if Maine_D2 > 50
replace Maine_D2_demwin =0 if Maine_D2 <=50

gen Nebraska_D1_demwin= 1 if Nebraska_D1 > 50
replace Nebraska_D1_demwin =0 if Nebraska_D1 <=50

gen Nebraska_D2_demwin= 1 if Nebraska_D2 > 50
replace Nebraska_D2_demwin =0 if Nebraska_D2 <=50

gen Nebraska_D3_demwin= 1 if Nebraska_D3 > 50
replace Nebraska_D3_demwin =0 if Nebraska_D3 <=50

* create all3 that adds the nebraska and maine results
gen all3 = all2
replace all3 = all2 + 1 if Maine_D1_demwin == 1
replace all3 = all3 + 1 if Maine_D2_demwin == 1

replace all3 = all3 + 1 if Nebraska_D1_demwin == 1
replace all3 = all3 + 1 if Nebraska_D2_demwin == 1
replace all3 = all3 + 1 if Nebraska_D3_demwin == 1


* create a variable indicating if Dems make at least 270 or more
	gen demwin= 1 if all3 >= cut_off
	replace demwin=0 if all3 < cut_off

	label define demwin 1 "Democrats win EC" 0 "Republicans win EC"
	label values demwin demwin 

	tab demwin
	gen parameter = `p'
*Save created files in a separate folder "SimulationData"
save "SimulationData\histogram_data_`forecastyear'_`p'.dta", replace
}

use "SimulationData\histogram_data_2024_`min'.dta", clear
local min2 = `min'+1
forvalues p = `min2'(1)`max' {
append using "SimulationData\histogram_data_2024_`p'.dta"
}
*Save created files in a separate folder "SimulationData"
save "SimulationData\histogram_data_2024_all_`sims'.dta", replace

****************************************
****************************************
****************************************
*Percent of Simulations with a Harris victory
****************************************
tab demwin
****************************************
****************************************
****************************************


****************************************
***Figure 2
****************************************
graph set window fontface "Times New Roman"
sum all3
local n = `r(N)'
*Calculate max frequency and percent for left side of graph
twoway__histogram_gen all3 if all3<270, frequency width(2) gen(freq1 c, replace)
sum freq1 
local fmax = `r(max)'
*Set max height to above max frequency
local maximum = 1.08*`fmax'
local arrowheight = `fmax'+(`maximum'-`fmax')/3
local ymaxperc = 100*`maximum'/`n'
local ymaxperc: di %5.0f `ymaxperc'
di `maximum'
di `ymaxperc'
local minimum = 0
local ECwin = 269
local ECmax = 538
twoway (histogram all3 if all3 < 269, frequency width(2) bcolor(red) ///
xscale(noline range(0 538)) xlabel(100(100)538, nogrid tlwidth(.05))  ///
xtitle("Forecasted Number of EC Votes Won by the Democratic Candidate") ///
yscale(noline noextend lwidth(0.05))  ylabel( , nolabel nogrid notick) ///
ytitle("Percent of Simulations") graphregion(fcolor(white) lcolor(white)) ///
plotregion(margin(b=0 l=0 t=0) style(none)) ///
text(`fmax' 190 "Republican Victory" "(Trump)", place(w)) ///
text(`fmax' 340 "Democratic Victory" "  (Harris)", place(e))  ///
text(`fmax' `minimum' "`ymaxperc'%", place(w)) legend(off) ysize(3.5) xsize(5)) ///
(pcarrowi `arrowheight' 272 `arrowheight' 302, mlcolor(blue) mfcolor(blue) mcolor(blue) msize(vlarge) mlwidth(vthick) barbsize(2) lcolor(blue)) ///
(pcarrowi `arrowheight' 266 `arrowheight' 236, mlcolor(red) mfcolor(red) mcolor(red) msize(vlarge) mlwidth(vthick) barbsize(2) lcolor(red)) ///
(pci `minimum' `ECwin' `maximum' `ECwin', lcolor(black) lwidth(medthin) lpattern(dash)) ///
(pci `maximum' `minimum' `maximum' `ECmax', lcolor(black) lwidth(medthin)) ///
(pci `minimum' `minimum' `maximum' `minimum', lcolor(black) lwidth(medthin)) ///
(pci `minimum' `ECmax' `maximum' `ECmax', lcolor(black) lwidth(medthin)) ///
(pci `minimum' `minimum' `minimum' `ECmax', lcolor(gray) lwidth(vvthin)) ///
(histogram all3 if all3 > 269, frequency width(2) bcolor(blue)) ///
(histogram all3 if all3 == 269, frequency width(2) bcolor(gray))

******************************************



****************************************
***Figure A-2
****************************************
* install gr0034.pkg  (labmask)
*ssc install stripplot

local vlist Alaska Alabama Arkansas Arizona California Colorado Connecticut District_of_Columbia Delaware Florida Georgia Hawaii Iowa Idaho Illinois Indiana Kansas Kentucky Louisiana Massachusetts Maryland Maine Michigan Minnesota Missouri Mississippi Montana North_Carolina North_Dakota Nebraska New_Hampshire New_Jersey New_Mexico Nevada New_York Ohio Oklahoma Oregon Pennsylvania Rhode_Island South_Carolina South_Dakota Tennessee Texas Utah Virginia Vermont Washington Wisconsin West_Virginia Wyoming Maine_D1 Maine_D2 Nebraska_D1 Nebraska_D2 Nebraska_D3


keep `vlist '
gen id = _n

foreach var of local vlist{
	rename `var' state`var'
	}
reshape long state, i(id) j(state_name) string 

gen random = runiform()

sort state_name (random)

rename state dem_vote_share


bysort state_name: egen mean= mean(dem_vote_share)

gen num_obs = _n

local sims = 70000 // set number of simulations here

local EV_one = round(`sims'/58) // create number of simulations per EV and round it

* Here: 1 EC = 100 simulations --> replace other simulations as .
* Also: Add variable label with number of ECV in brackets

replace dem_vote_share=. if num_obs >= (9*`EV_one') & state_name =="Alabama" 
replace dem_vote_share=. if num_obs >= 1*`sims'+(3*`EV_one') & state_name =="Alaska" // add number of sims (because Alaska is second round of simulations 1x. 3* EV_one votes because Alaska has 3 EV)
replace dem_vote_share =. if num_obs >= 2*`sims'+(11*`EV_one') & state_name =="Arizona"
replace dem_vote_share =. if num_obs >= 3*`sims'+(6*`EV_one') & state_name =="Arkansas"
replace dem_vote_share =. if num_obs >= 4*`sims'+(55*`EV_one') & state_name =="California"
replace dem_vote_share =. if num_obs >= 5*`sims'+(9*`EV_one') & state_name =="Colorado"
replace dem_vote_share =. if num_obs >= 6*`sims'+(7*`EV_one') & state_name =="Connecticut"
replace dem_vote_share =. if num_obs >= 7*`sims'+(3*`EV_one') & state_name =="Delaware"
replace dem_vote_share =. if num_obs >= 8*`sims'+(3*`EV_one') & state_name =="District_of_Columbia"
replace dem_vote_share =. if num_obs >= 9*`sims'+(29*`EV_one') & state_name =="Florida"
replace dem_vote_share =. if num_obs >= 10*`sims'+(16*`EV_one') & state_name =="Georgia"
replace dem_vote_share =. if num_obs >= 11*`sims'+(4*`EV_one') & state_name =="Hawaii"
replace dem_vote_share =. if num_obs >= 12*`sims'+(4*`EV_one') & state_name =="Idaho"
replace dem_vote_share =. if num_obs >= 13*`sims'+(20*`EV_one') & state_name =="Illinois"
replace dem_vote_share =. if num_obs >= 14*`sims'+(11*`EV_one') & state_name =="Indiana"
replace dem_vote_share =. if num_obs >= 15*`sims'+(6*`EV_one') & state_name =="Iowa"
replace dem_vote_share =. if num_obs >= 16*`sims'+(6*`EV_one') & state_name =="Kansas"
replace dem_vote_share =. if num_obs >= 17*`sims'+(8*`EV_one') & state_name =="Kentucky"
replace dem_vote_share =. if num_obs >= 18*`sims'+(8*`EV_one') & state_name =="Louisiana"
replace dem_vote_share =. if num_obs >= 19*`sims'+(2*`EV_one') & state_name =="Maine"
replace dem_vote_share =. if num_obs >= 20*`sims'+(1*`EV_one') & state_name =="Maine_D1"
replace dem_vote_share =. if num_obs >= 21*`sims'+(1*`EV_one') & state_name =="Maine_D2"
replace dem_vote_share =. if num_obs >= 22*`sims'+(10*`EV_one') & state_name =="Maryland"
replace dem_vote_share =. if num_obs >= 23*`sims'+(11*`EV_one') & state_name =="Massachusetts"
replace dem_vote_share =. if num_obs >= 24*`sims'+(16*`EV_one') & state_name =="Michigan"
replace dem_vote_share =. if num_obs >= 25*`sims'+(10*`EV_one') & state_name =="Minnesota"
replace dem_vote_share =. if num_obs >= 26*`sims'+(6*`EV_one') & state_name =="Mississippi"
replace dem_vote_share =. if num_obs >= 27*`sims'+(10*`EV_one') & state_name =="Missouri"
replace dem_vote_share =. if num_obs >= 28*`sims'+(3*`EV_one') & state_name =="Montana"
replace dem_vote_share =. if num_obs >= 29*`sims'+(2*`EV_one') & state_name =="Nebraska"
replace dem_vote_share =. if num_obs >= 30*`sims'+(1*`EV_one') & state_name =="Nebraska_D1"
replace dem_vote_share =. if num_obs >= 31*`sims'+(1*`EV_one') & state_name =="Nebraska_D2"
replace dem_vote_share =. if num_obs >= 32*`sims'+(1*`EV_one') & state_name =="Nebraska_D3"
replace dem_vote_share =. if num_obs >= 33*`sims'+(6*`EV_one') & state_name =="Nevada"
replace dem_vote_share =. if num_obs >= 34*`sims'+(4*`EV_one') & state_name =="New_Hampshire"
replace dem_vote_share =. if num_obs >= 35*`sims'+(14*`EV_one') & state_name =="New_Jersey"
replace dem_vote_share =. if num_obs >= 36*`sims'+(5*`EV_one') & state_name =="New_Mexico"
replace dem_vote_share =. if num_obs >= 37*`sims'+(29*`EV_one') & state_name =="New_York"
replace dem_vote_share =. if num_obs >= 38*`sims'+(15*`EV_one') & state_name =="North_Carolina"
replace dem_vote_share =. if num_obs >= 39*`sims'+(3*`EV_one') & state_name =="North_Dakota"
replace dem_vote_share =. if num_obs >= 40*`sims'+(18*`EV_one') & state_name =="Ohio"
replace dem_vote_share =. if num_obs >= 41*`sims'+(7*`EV_one') & state_name =="Oklahoma"
replace dem_vote_share =. if num_obs >= 42*`sims'+(7*`EV_one') & state_name =="Oregon"
replace dem_vote_share =. if num_obs >= 43*`sims'+(20*`EV_one') & state_name =="Pennsylvania"
replace dem_vote_share =. if num_obs >= 44*`sims'+(4*`EV_one') & state_name =="Rhode_Island"
replace dem_vote_share =. if num_obs >= 45*`sims'+(9*`EV_one') & state_name =="South_Carolina"
replace dem_vote_share =. if num_obs >= 46*`sims'+(3*`EV_one') & state_name =="South_Dakota"
replace dem_vote_share =. if num_obs >= 47*`sims'+(11*`EV_one') & state_name =="Tennessee"
replace dem_vote_share =. if num_obs >= 48*`sims'+(38*`EV_one') & state_name =="Texas"
replace dem_vote_share =. if num_obs >= 49*`sims'+(6*`EV_one') & state_name =="Utah"
replace dem_vote_share =. if num_obs >= 50*`sims'+(3*`EV_one') & state_name =="Vermont"
replace dem_vote_share =. if num_obs >= 51*`sims'+(13*`EV_one') & state_name =="Virginia"
replace dem_vote_share =. if num_obs >= 52*`sims'+(12*`EV_one') & state_name =="Washington"
replace dem_vote_share =. if num_obs >= 53*`sims'+(5*`EV_one') & state_name =="West_Virginia"
replace dem_vote_share =. if num_obs >= 54*`sims'+(10*`EV_one') & state_name =="Wisconsin"
replace dem_vote_share =. if num_obs >= 55*`sims'+(3*`EV_one') & state_name =="Wyoming"


replace state_name ="D.C." if state_name =="District_of_Columbia"
replace state_name ="New Hampshire" if state_name =="New_Hampshire"
replace state_name ="New Jersey" if state_name =="New_Jersey"
replace state_name ="New Mexico" if state_name =="New_Mexico"
replace state_name ="New York" if state_name =="New_York"
replace state_name ="North Carolina" if state_name =="North_Carolina"
replace state_name ="North Dakota" if state_name =="North_Dakota"
replace state_name ="South Carolina" if state_name =="South_Carolina"
replace state_name ="South Dakota" if state_name =="South_Dakota"
replace state_name ="West Virginia" if state_name =="West_Virginia"
replace state_name ="Rhode Island" if state_name =="Rhode_Island"

* rename the state name to name & EC votes:

replace state_name = "Wyoming (3)" if state_name =="Wyoming"
replace state_name = "Idaho (4)" if state_name =="Idaho"
replace state_name = "Arkansas (6)" if state_name =="Arkansas"
replace state_name = "Kentucky (8)" if state_name =="Kentucky"
replace state_name = "Oklahoma (7)" if state_name =="Oklahoma"
replace state_name = "West Virginia (4)" if state_name =="West Virginia"
replace state_name = "Utah (6)" if state_name =="Utah"
replace state_name = "Alabama (9)" if state_name =="Alabama"
replace state_name = "North Dakota (3)" if state_name =="North Dakota"
replace state_name = "South Dakota (3)" if state_name =="South Dakota"
replace state_name = "Louisiana (8)" if state_name =="Louisiana"
replace state_name = "Mississippi (6)" if state_name =="Mississippi"
replace state_name = "Tennessee (11)" if state_name =="Tennessee"
replace state_name = "Nebraska (2)" if state_name =="Nebraska"
replace state_name = "Nebraska: D1 (1)" if state_name =="Nebraska_D1"
replace state_name = "Nebraska: D2 (1)" if state_name =="Nebraska_D2"
replace state_name = "Nebraska: D3 (1)" if state_name =="Nebraska_D3"
replace state_name = "Kansas (6)" if state_name =="Kansas"
replace state_name = "Indiana (11)" if state_name =="Indiana"
replace state_name = "Texas (40)" if state_name =="Texas"
replace state_name = "South Carolina (9)" if state_name =="South Carolina"
replace state_name = "Missouri (10)" if state_name =="Missouri"
replace state_name = "North Carolina (16)" if state_name =="North Carolina"
replace state_name = "Georgia (16)" if state_name =="Georgia"
replace state_name = "Alaska (3)" if state_name =="Alaska"
replace state_name = "Ohio (17)"  if state_name =="Ohio"
replace state_name = "Florida (30)" if state_name =="Florida"
replace state_name = "Iowa (6)" if state_name =="Iowa"
replace state_name = "Wisconsin (10)" if state_name =="Wisconsin"
replace state_name = "Arizona (11)" if state_name =="Arizona"
replace state_name = "Nevada (6)" if state_name =="Nevada"
replace state_name = "Virginia (13)" if state_name =="Virginia"
replace state_name = "Pennsylvania (19)" if state_name =="Pennsylvania"
replace state_name = "New Mexico (5)" if state_name =="New Mexico"
replace state_name = "Michigan (15)" if state_name =="Michigan"
replace state_name = "Minnesota (10)" if state_name =="Minnesota"
replace state_name = "Colorado (10)" if state_name =="Colorado"
replace state_name = "Maine (2)" if state_name =="Maine"
replace state_name = "Maine: D1 (1)" if state_name =="Maine_D1"
replace state_name = "Maine: D2 (1)" if state_name =="Maine_D2"
replace state_name = "New Hampshire (4)" if state_name =="New Hampshire"
replace state_name = "Oregon (8)" if state_name =="Oregon"
replace state_name = "Washington (12)" if state_name =="Washington"
replace state_name = "Connecticut (7)" if state_name =="Connecticut"
replace state_name = "New Jersey (14)" if state_name =="New Jersey"
replace state_name = "Rhode Island (4)" if state_name =="Rhode Island"
replace state_name = "Illinois (19)" if state_name =="Illinois"
replace state_name = "Delaware (3)" if state_name =="Delaware"
replace state_name = "New York (28)" if state_name =="New York"
replace state_name = "Maryland (10)" if state_name =="Maryland"
replace state_name = "Vermont (3)" if state_name =="Vermont"
replace state_name = "California (54)" if state_name =="California"
replace state_name = "Hawaii (4)" if state_name =="Hawaii"
replace state_name = "Massachusetts (11)"  if state_name =="Massachusetts"
replace state_name = "D.C. (3)" if state_name =="D.C."
replace state_name = "Montana (4)" if state_name =="Montana"

gen blue =1 if mean >50
replace blue =0 if mean <50
* 
gsort- mean
* This is a bit complicated and needs to be redone if the mean changes!!!
bysort mean: gen new_id=1 if _n==1

replace new_id = sum(new_id)
replace new_id = . if missing(mean)

egen newid = group(state_name)

* ssc install  labutil --> this way a variable takes the values of another variable as value labels!
labmask new_id, values(state_name)

* get rid of values over 100 (DC has some out-of-bounds predictions):
replace dem_vote_share =. if dem_vote_share >100 & dem_vote_share!=.

* Generate the number of EV per state
gen EV  = substr(state_name,-3,.)
replace EV = subinstr(EV, "(", "",.) 
replace EV = subinstr(EV, ")", "",.) 

* destring
destring EV, replace

* sort according to new_id
sort new_id 


gen test = EV 
*create a variable that cummulates the values
*replace test = EV + test[_n-`sims'] 

replace test = EV + test[_n-`sims'] if _n > 1
* identify the threshold moment
gen threshold = 1 if test>=269 & test[_n-`sims'] <269

* replace the threshold 
replace threshold = new_id if threshold==1

*egen the threshold first before one can assign it to a local. 
*Calculates number of states from the bottom of figure.
egen threshold2=min(threshold)

generate match = (threshold == threshold2)

* Create the different thresholds for the graph below
local dem_win=threshold2
local dem_winM=threshold2-1
local dem_winPLUS1=threshold2+1
display `dem_winPLUS1'
local dem_winPLUS_5=threshold2+.5

local dem_win_plus=threshold2

* identifies the electoral votes for Rep if dem doesn't win next state

summarize test if match
local extracted_value = r(mean)
summarize EV if match
local EC_flip = r(mean)
display "`EC_flip'"
local dem_EC = 538 - `extracted_value'
display "`dem_EC'"
local rep_EC = `extracted_value'-`EC_flip'
display "`rep_EC'"
local rep_winPLUS_5=threshold2-.5
local rep_win=threshold2-1
local rep_winPLUS1=threshold2-2

stripplot dem_vote_share, over (new_id) separate(blue) mcolor(red*.6 blue) width(.1) ///
height(1.2) ms(p p)  stack legend(off) xtitle("") xscale(noextend noline) xlabel(,nogrid) xlabel(0(25)100) ytitle("") ///
ylabel(,labsize(small) glwidth(thin) glpattern(dot)  glcolor(gray))  variablelabels yscale(noline)    ///
plotregion(margin(l-2.25 r-2 b-6 t-6) lwidth(vthin) lcolor(black)) ///
graphregion(fcolor(white) lcolor(white)) ///
plot((pci 0 50 57 50, lcolor(black)) ///
(pci `dem_winPLUS_5' 0 `dem_winPLUS_5' 50, lpattern(dash) lcolor(blue)) ///
(pci `rep_winPLUS_5' 50 `rep_winPLUS_5' 100, lpattern(dash) lcolor(red)) ///
(pcarrowi `rep_win' 80 `dem_win_plus' 80, lcolor(black) msize(1) mcolor(black))  ///
(pcarrowi `dem_winPLUS1' 25 `dem_win' 25, lcolor(black) msize(1) mcolor(black)))  ///
text(`dem_winPLUS1' 25 "Harris" "needs one" "more state" "to win" "(`dem_EC')", place(n)) ///
text(`rep_winPLUS1' 80 "(`rep_EC')" "Trump" "needs one" "more state" "to win", place(s)) ///
ysize(12)


